package com.xpchenfrank.utils;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.DeflateDecompressingEntity;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.CharArrayBuffer;
import org.springframework.stereotype.Component;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;

/**
 * 网上高手的抓取http内容的工具
 *
 * @author xpchen
 * @time 2018/4/20
 * @Description
 * @copyright Wuxi Yazuo ,Ltd.copyright 2015-2025
 */
@Component
@SuppressWarnings("deprecation")
public class CrawlMethodManager {

    static String ip = "";
    static int port = 0;
    static String ipUrl = "http://localhost:8080/ipFilter/getIp/getIp";


    static HttpClient httpPostClient = new DefaultHttpClient(
            new ThreadSafeClientConnManager());

    /**
     * httpClient的get方法
     *
     * @param url
     *            String 要抓取的链接
     * @param encode
     *            String 抓取时使用的编码
     * @param goagentFlag
     *            boolean 是否启用goagent
     * @param goagentNum
     *            int goagent尝试的次数
     * @param companyFlag
     *            boolean 是否启用代理
     * @param companyNum
     *            int 代理尝试的次数
     * @param localFlag
     *            boolean 是否启用本机
     * @param localNum
     *            int 本机尝试的次数
     */

    public String crawlPageContentByGet(String url, String encode,
                                        boolean goagentFlag, int goagentNum, boolean companyFlag,
                                        int companyNum, boolean localFlag, int localNum)
            throws ClientProtocolException, IOException {
        String content = "";
        if (goagentFlag && content.equals("")) {
            int goagentCount = 0;
            while (content.equals("") && goagentCount < goagentNum) {
                try {
                    System.out.println("goagent正在请求");
                    content = doGetByGoagent(url, encode);
                } catch (Exception e) {
                    // System.out.println("goagent请求失败");
                }
                goagentCount++;
            }
        }
        if (companyFlag && content.equals("")) {
            int companyCount = 0;
            while (content.equals("") && companyCount < companyNum) {
                try {
                    System.out.println("公司代理ip正在请求");
                    content = getByCompanyProxy(url, encode);
                } catch (Exception e) {
                    // System.out.println("公司代理ip请求失败");
                }
                companyCount++;
            }
        }
        if (localFlag && content.equals("")) {
            int localCount = 0;
            while (content.equals("") && localCount < localNum) {
                try {
                    System.out.println("本机正在请求");
                    content = doGet(url, encode);
                } catch (Exception e) {
                    // System.out.println("本机请求失败");
                }
                localCount++;
            }
        }
        return content;
    }

    /**
     *
     * @Description: get web content
     * @param @param url
     * @param @param encode
     * @param @return
     * @param @throws ClientProtocolException
     * @param @throws IOException
     * @return String
     * @throws
     * @author joe
     * @date 2014-12-11
     */
    public String crawlPageContentByGet(String url, String encode)
            throws ClientProtocolException, IOException {
        String content = "";
        try {
            content = doGetByGoagent(url, encode);
            if (content == null || content.equals("")) {
                System.out.println("启用公司代理");
                content = getByCompanyProxy(url, encode);
                // if (content == null || content.equals("")) {
                // System.out.println("启用本机");
                // content = doGet(url, encode);
                // }
            }
        } catch (Exception e) {
            try {
                System.out.println("goagent连接失败,启用公司代理");
                content = getByCompanyProxy(url, encode);
                // if (content == null || content.equals("")) {
                // System.out.println("公司代理连接失败,启用本机");
                // content = doGet(url, encode);
                // }
            } catch (Exception e2) {
                try {
                    content = getByCompanyProxy(url, encode);
                    // e2.printStackTrace();
                    // System.out.println("公司代理连接失败,5秒后启用本机");
                    // Thread.sleep(5000);
                    // content = doGet(url, encode);
                } catch (Exception e3) {
                    e3.printStackTrace();
                }

            }

        }

        return content;
    }

    private String getByCompanyProxy(String url, String encode) {
        int count = 10;
        String result = "";
        String urlString = url;
        String proxy = "";
        HttpHost proxyHost = null;
        boolean newProxy = false;
        int oldProxyUsecount = 0;
        for (int i = 0; i <= count; i++) {
            if (!ip.equals("")) {
                proxyHost = new HttpHost(ip, port, null);
            }

            try {
                if (newProxy || oldProxyUsecount > 2 || ip.equals("")) {
                    oldProxyUsecount = 0;
                    String[] proxys = null;
                    try {
                        while (proxy.equals("") || !proxy.contains(":")) {
                            System.out.println("ip为空,正在提取");
                            proxy = doGet(ipUrl, "gbk");
                        }
                        proxys = proxy.replaceAll("\"|//|/|\r\n| ", "").split(
                                ":");
                    } catch (Exception e) {
                        while (proxy.equals("") || !proxy.contains(":")) {
                            System.out.println("ip为空,正在提取");
                            proxy = doGet(ipUrl, "gbk");
                        }
                        proxys = proxy.replaceAll("\"|//|/|\r\n| ", "").split(
                                ":");
                        // proxy = doGet(
                        // ,
                        // "gbk");
                        // proxys = proxy.split(":");
                    }
                    ip = proxys[0];
                    port = Integer.parseInt(proxys[1]);
                    proxyHost = new HttpHost(ip, port, null);
                }
                System.out.println("正在使用代理" + ip + ":" + port + ":" + port);
                HttpGet httpRequst = new HttpGet(urlString);
                httpRequst.addHeader("Accept-Encoding", "gzip,deflate,sdch");
                httpRequst.getParams().setParameter(
                        CoreProtocolPNames.HTTP_CONTENT_CHARSET, encode);
                DefaultHttpClient httpClient = new DefaultHttpClient();
                httpClient.getParams().setParameter(
                        CoreConnectionPNames.CONNECTION_TIMEOUT, 9000);// 连接时间20s
                httpClient.getParams().setParameter(
                        CoreConnectionPNames.SO_TIMEOUT, 9000);// 数据传输时间60s
                httpClient.getParams().setParameter(
                        ConnRouteParams.DEFAULT_PROXY, proxyHost);
                HttpResponse httpResponse = httpClient.execute(httpRequst);// 其中HttpGet是HttpUriRequst的子类
                if (httpResponse.getStatusLine().getStatusCode() == 200) {
                    HttpEntity httpEntity = httpResponse.getEntity();
                    if (httpEntity.getContentEncoding() != null) {
                        if ("gzip".equalsIgnoreCase(httpEntity
                                .getContentEncoding().getValue())) {
                            httpEntity = new GzipDecompressingEntity(httpEntity);
                        } else if ("deflate".equalsIgnoreCase(httpEntity
                                .getContentEncoding().getValue())) {
                            httpEntity = new DeflateDecompressingEntity(
                                    httpEntity);
                        }
                    }
                    result = enCodetoString(httpEntity, encode);// 取出应答字符串
                    if (resultTest(result)) {
                        System.out.println(ip + "公司代理成功抓取" + url);
                        return result;
                    } else if (result.contains("function JumpSelf")
                            && result.contains("WebShieldSessionVerify")) {
                        int indexs = result.indexOf("&WebShieldSessionVerify");
                        int indexe = result.indexOf("\";}</script>");
                        String verify = result.substring(indexs, indexe);
                        urlString = urlString + verify;
                        newProxy = false;
                    } else if (result.contains("function JumpSelf")
                            && !result.contains("WebShieldSessionVerify")) {
                        urlString = url;
                        newProxy = false;
                    } else {
                        System.out.println("网页含有错误特殊字符" + urlString);
                        oldProxyUsecount++;
                        System.out.println(result);
                    }
                } else {
                    System.out.println(httpResponse.getStatusLine()
                            .getStatusCode() + " " + urlString + " 状态不为200");
                }
                oldProxyUsecount++;
                httpRequst.abort();
            } catch (ClientProtocolException e) {
                newProxy = true;
                System.out.println(ip + "代理ip拒绝了");
            } catch (IOException e) {
                oldProxyUsecount++;
                System.out.println(ip + "代理读取超时");
            }
        }
        return "";
    }

    private String doGet(String url, String encode)
            throws ClientProtocolException, IOException {
        String result = "";
        try {
            HttpGet httpRequst = new HttpGet(url);
            // httpRequst.addHeader("Content-Type", "text/html;charset=" +
            // encode);
            // httpRequst.getParams().setParameter(
            // CoreProtocolPNames.HTTP_CONTENT_CHARSET, encode);
            DefaultHttpClient httpClient = new DefaultHttpClient();
            // httpClient.getParams().setParameter(
            // CoreProtocolPNames.HTTP_CONTENT_CHARSET, encode);
            httpClient.getParams().setParameter(
                    CoreConnectionPNames.CONNECTION_TIMEOUT, 8000);// 连接时间20s
            httpClient.getParams().setParameter(
                    CoreConnectionPNames.SO_TIMEOUT, 8000);// 数据传输时间60s
            HttpResponse httpResponse = httpClient.execute(httpRequst);// 其中HttpGet是HttpUriRequst的子类
            if (httpResponse.getStatusLine().getStatusCode() == 200) {
                HttpEntity httpEntity = httpResponse.getEntity();
                if (httpEntity.getContentEncoding() != null) {
                    if ("gzip".equalsIgnoreCase(httpEntity.getContentEncoding()
                            .getValue())) {
                        httpEntity = new GzipDecompressingEntity(httpEntity);
                    } else if ("deflate".equalsIgnoreCase(httpEntity
                            .getContentEncoding().getValue())) {
                        httpEntity = new DeflateDecompressingEntity(httpEntity);
                    }
                }

                result = enCodetoString(httpEntity, encode);// 取出应答字符串
            } else {
                httpRequst.abort();
            }
        } catch (ClientProtocolException e) {

            System.out.println("doget代理读取超时");
        } catch (IOException e) {
            System.out.println("doget代理读取超时");

        }
        return result;
    }

    private String doGetByGoagent(String url, String encode)
            throws ClientProtocolException, IOException {
        String result = "";
        HttpGet httpRequst = new HttpGet(url);
        httpRequst.addHeader("Accept-Encoding", "gzip,deflate,sdch");
        httpRequst.getParams().setParameter(
                CoreProtocolPNames.HTTP_CONTENT_CHARSET, encode);
        DefaultHttpClient httpClient = new DefaultHttpClient();
        HttpHost proxyHost = new HttpHost("127.0.0.1", 8087, null);
        httpClient.getParams().setParameter(
                CoreConnectionPNames.CONNECTION_TIMEOUT, 8000);// 连接时间20s
        httpClient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT,
                6000);// 数据传输时间60s
        httpClient.getParams().setParameter(ConnRouteParams.DEFAULT_PROXY,
                proxyHost);
        HttpResponse httpResponse = httpClient.execute(httpRequst);// 其中HttpGet是HttpUriRequst的子类
        if (httpResponse.getStatusLine().getStatusCode() == 200) {
            HttpEntity httpEntity = httpResponse.getEntity();
            if (httpEntity.getContentEncoding() != null) {
                if ("gzip".equalsIgnoreCase(httpEntity.getContentEncoding()
                        .getValue())) {
                    httpEntity = new GzipDecompressingEntity(httpEntity);
                } else if ("deflate".equalsIgnoreCase(httpEntity
                        .getContentEncoding().getValue())) {
                    httpEntity = new DeflateDecompressingEntity(httpEntity);
                }
            }
            result = enCodetoString(httpEntity, encode);// 取出应答字符串
        } else {
            httpRequst.abort();
        }
        return result;
    }

    public String crawlPageContentByPost(String url, String pram, String encode)
            throws ClientProtocolException, IOException {
        String content = "";
        try {
            content = doPostByGoagent(url, pram, encode);
            if (content == null || content.equals("")) {
                content = doPostByGoagent(url, pram, encode);
                // System.out.println("启用公司代理");
                // content = postByCompanyProxy(url, pram, encode);
                // if (content == null || content.equals("")) {
                // System.out.println("5秒后启用本机");
                // Thread.sleep(5000);
                // content = doPost(url, pram, encode);
                // }
            }
        } catch (Exception e) {
            try {
                content = doPostByGoagent(url, pram, encode);
                // System.out.println("goagent连接失败,启用公司代理");
                // content = postByCompanyProxy(url, pram, encode);
                // if (content == null || content.equals("")) {
                // System.out.println("公司代理连接失败,启用本机");
                // content = doPost(url, pram, encode);
                // }
            } catch (Exception e2) {
                try {
                    content = doPostByGoagent(url, pram, encode);
                    // e2.printStackTrace();
                    // content = postByCompanyProxy(url, pram, encode);
                    // System.out.println("公司代理连接失败,启用本机");
                    // content = doPost(url, pram, encode);
                } catch (Exception e3) {
                    e3.printStackTrace();
                }

            }

        }

        return content;
    }

    private String doPostByGoagent(String url, String parm, String encode)
            throws ClientProtocolException, IOException {
        String result = "";
        HttpPost httpRequst = new HttpPost(url);// 创建HttpPost对象
        HttpHost proxy = new HttpHost("127.0.0.1", 8087, null);
        StringEntity entity = new StringEntity(parm);
        entity.setContentType("application/x-www-form-urlencoded");
        entity.setContentEncoding(encode);
        httpRequst.setEntity(entity);
        DefaultHttpClient httpClient = new DefaultHttpClient();
        httpClient.getParams().setParameter(
                CoreConnectionPNames.CONNECTION_TIMEOUT, 8000);// 连接时间20s
        httpClient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT,
                8000);// 数据传输时间60s
        httpClient.getParams().setParameter(ConnRouteParams.DEFAULT_PROXY,
                proxy);
        HttpResponse httpResponse = httpClient.execute(httpRequst);
        // System.out.println(httpResponse.getStatusLine().getStatusCode());
        if (httpResponse.getStatusLine().getStatusCode() == 200) {
            HttpEntity httpEntity = httpResponse.getEntity();
            if (httpEntity.getContentEncoding() != null) {
                if ("gzip".equalsIgnoreCase(httpEntity.getContentEncoding()
                        .getValue())) {
                    httpEntity = new GzipDecompressingEntity(httpEntity);
                } else if ("deflate".equalsIgnoreCase(httpEntity
                        .getContentEncoding().getValue())) {
                    httpEntity = new DeflateDecompressingEntity(httpEntity);
                }
            }
            result = enCodetoString(httpEntity, encode);// 取出应答字符串
        }
        return result;
    }

    public String doPost(String url, String parm, String encode)
            throws ClientProtocolException, IOException {
        String result = "";
        HttpPost httpRequst = new HttpPost(url);// 创建HttpPost对象
        StringEntity entity = new StringEntity(parm);
        entity.setContentType("application/x-www-form-urlencoded");
        entity.setContentEncoding(encode);
        httpRequst.setEntity(entity);
        DefaultHttpClient httpClient = new DefaultHttpClient();
        httpClient.getParams().setParameter(
                CoreConnectionPNames.CONNECTION_TIMEOUT, 8000);// 连接时间20s
        httpClient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT,
                8000);// 数据传输时间60s
        HttpResponse httpResponse = httpClient.execute(httpRequst);
        // System.out.println(httpResponse.getStatusLine().getStatusCode());
        if (httpResponse.getStatusLine().getStatusCode() == 200) {
            HttpEntity httpEntity = httpResponse.getEntity();
            if (httpEntity.getContentEncoding() != null) {
                if ("gzip".equalsIgnoreCase(httpEntity.getContentEncoding()
                        .getValue())) {
                    httpEntity = new GzipDecompressingEntity(httpEntity);
                } else if ("deflate".equalsIgnoreCase(httpEntity
                        .getContentEncoding().getValue())) {
                    httpEntity = new DeflateDecompressingEntity(httpEntity);
                }
            }
            result = enCodetoString(httpEntity, encode);// 取出应答字符串
            return result;
        }
        return result;
    }

    @SuppressWarnings("unused")
    private String postByCompanyProxy(String url, String parm, String encode)
            throws ClientProtocolException, IOException {
        int count = 5;
        String result = "";
        String urlString = url;
        boolean okProxy = false;
        boolean newProxy = false;
        int oldProxyUsecount = 0;
        for (int i = 0; i <= count; i++) {

            try {
                if (newProxy || oldProxyUsecount > 2 || ip.equals("")) {
                    okProxy = postByCompanyProxyBoolean(url, parm, encode);
                }
                if (okProxy) {
                    System.out.println("正在使用代理" + ip + ":" + port);
                    HttpPost httpRequst = new HttpPost(url);// 创建HttpPost对象
                    StringEntity entity = new StringEntity(parm);
                    entity.setContentType("application/x-www-form-urlencoded");
                    httpRequst.setEntity(entity);
                    httpRequst.getParams().setParameter(
                            CoreProtocolPNames.HTTP_CONTENT_CHARSET, encode);
                    HttpResponse httpResponse = httpPostClient
                            .execute(httpRequst);// 其中HttpGet是HttpUriRequst的子类
                    if (httpResponse.getStatusLine().getStatusCode() == 200) {
                        HttpEntity httpEntity = httpResponse.getEntity();
                        if (httpEntity.getContentEncoding() != null) {
                            if ("gzip".equalsIgnoreCase(httpEntity
                                    .getContentEncoding().getValue())) {
                                httpEntity = new GzipDecompressingEntity(
                                        httpEntity);
                            } else if ("deflate".equalsIgnoreCase(httpEntity
                                    .getContentEncoding().getValue())) {
                                httpEntity = new DeflateDecompressingEntity(
                                        httpEntity);
                            }
                        }
                        result = enCodetoString(httpEntity, encode);// 取出应答字符串
                        // System.out.println(result);
                        if (resultTest(result)) {
                            return result;
                        } else if (result.contains("function JumpSelf")
                                && result.contains("WebShieldSessionVerify")) {
                            int indexs = result
                                    .indexOf("&WebShieldSessionVerify");
                            int indexe = result.indexOf("\";}</script>");
                            String verify = result.substring(indexs, indexe);
                            urlString = urlString + verify;
                            newProxy = false;
                        } else if (result.contains("function JumpSelf")
                                && !result.contains("WebShieldSessionVerify")) {
                            urlString = url;
                            newProxy = false;
                        }
                    } else if (httpResponse.getStatusLine().getStatusCode() == 302) {
                        System.out.println("重定向了");
                        Header header = httpResponse.getFirstHeader("location");
                        if (header != null) {
                            urlString = header.getValue();
                            System.out.println(urlString);
                            if (urlString.contains("tabid=26")) {
                                urlString = "http://www.landchina.com"
                                        + urlString;
                                result = getByHttpClient(urlString, encode,
                                        httpPostClient);
                                if (resultTest(result)) {
                                    System.out.println(i + "公司代理成功抓取" + url);
                                    return result;
                                }
                                newProxy = false;
                            }
                            newProxy = false;
                        }
                    } else {
                        httpRequst.abort();
                    }
                } else {
                    oldProxyUsecount++;
                }
            } catch (ClientProtocolException e) {
                newProxy = true;
                System.out.println(ip + "代理ip拒绝了");
            } catch (IOException e) {
                oldProxyUsecount++;
                System.out.println(ip + "代理读取超时");
            }
        }
        return "";
    }

    private String getByHttpClient(String url, String encode,
                                   HttpClient httpClient) {
        int count = 2;
        String result = "";
        String urlString = url;
        for (int i = 0; i <= count; i++) {
            try {
                HttpGet httpRequst = new HttpGet(urlString);
                httpRequst.setHeader("Content-Type",
                        "application/x-www-form-urlencoded");
                HttpResponse httpResponse = httpClient.execute(httpRequst);// 其中HttpGet是HttpUriRequst的子类
                if (httpResponse.getStatusLine().getStatusCode() == 200) {
                    HttpEntity httpEntity = httpResponse.getEntity();
                    if (httpEntity.getContentEncoding() != null) {
                        if ("gzip".equalsIgnoreCase(httpEntity
                                .getContentEncoding().getValue())) {
                            httpEntity = new GzipDecompressingEntity(httpEntity);
                        } else if ("deflate".equalsIgnoreCase(httpEntity
                                .getContentEncoding().getValue())) {
                            httpEntity = new DeflateDecompressingEntity(
                                    httpEntity);
                        }
                    }
                    result = enCodetoString(httpEntity, encode);// 取出应答字符串
                    if (resultTest(result)) {
                        System.out.println(ip + "公司代理成功抓取" + url);
                        return result;
                    } else if (result.contains("function JumpSelf")
                            && result.contains("WebShieldSessionVerify")) {
                        int indexs = result.indexOf("&WebShieldSessionVerify");
                        int indexe = result.indexOf("\";}</script>");
                        String verify = result.substring(indexs, indexe);
                        urlString = urlString + verify;
                    } else if (result.contains("function JumpSelf")
                            && !result.contains("WebShieldSessionVerify")) {
                        urlString = url;
                    }
                } else {
                    httpRequst.abort();
                }
            } catch (ClientProtocolException e) {
                System.out.println(ip + "代理ip拒绝了");
            } catch (IOException e) {
                System.out.println(ip + "代理读取超时");
            }
        }
        return "";
    }

    /**
     * 新ip第一次访问时要先通过安全验证，这时只能得到首页的内容，所以在post前线验证一次 <功能详细描述> [参数说明]
     *
     * @return void [返回类型说明]
     * @exception throws [违例类型] [违例说明]
     * @see [类、类#方法、类#成员]
     */
    private Boolean postByCompanyProxyBoolean(String url, String parm,
                                              String encode) throws ClientProtocolException, IOException {
        int count = 10;
        String result = "";
        String urlString = url;
        String proxy = "";
        HttpHost proxyHost = null;
        boolean newProxy = false;
        int oldProxyUsecount = 0;
        for (int i = 0; i <= count; i++) {

            try {
                if (newProxy || oldProxyUsecount > 2 || ip.equals("")) {
                    oldProxyUsecount = 0;
                    String[] proxys = null;
                    try {
                        while (proxy.equals("") || !proxy.contains(":")) {
                            System.out.println("ip为空,正在提取");
                            proxy = doGet(ipUrl, "gbk");
                        }
                        proxys = proxy.replaceAll("\"|//|/|\r\n| | ", "")
                                .split(":");
                    } catch (Exception e) {
                        while (proxy.equals("") || !proxy.contains(":")) {
                            System.out.println("ip为空,正在提取");
                            proxy = doGet(ipUrl, "gbk");
                        }
                        proxys = proxy.replaceAll("\"|//|/|\r\n| ", "").split(
                                ":");
                    }
                    ip = proxys[0];
                    port = Integer.parseInt(proxys[1]);
                    proxyHost = new HttpHost(ip, port, null);
                }
                System.out.println("正在使用代理" + ip + ":" + port);
                HttpPost httpRequst = new HttpPost(url);// 创建HttpPost对象
                StringEntity entity = new StringEntity(parm);
                entity.setContentType("application/x-www-form-urlencoded");
                httpRequst.setEntity(entity);
                httpRequst.getParams().setParameter(
                        CoreProtocolPNames.HTTP_CONTENT_CHARSET, encode);
                httpPostClient.getParams().setParameter(
                        CoreConnectionPNames.CONNECTION_TIMEOUT, 10000);// 连接时间20s
                httpPostClient.getParams().setParameter(
                        CoreConnectionPNames.SO_TIMEOUT, 8000);// 数据传输时间60s
                httpPostClient.getParams().setParameter(
                        ConnRouteParams.DEFAULT_PROXY, proxyHost);
                HttpResponse httpResponse = httpPostClient.execute(httpRequst);// 其中HttpGet是HttpUriRequst的子类
                if (httpResponse.getStatusLine().getStatusCode() == 200) {
                    HttpEntity httpEntity = httpResponse.getEntity();
                    if (httpEntity.getContentEncoding() != null) {
                        if ("gzip".equalsIgnoreCase(httpEntity
                                .getContentEncoding().getValue())) {
                            httpEntity = new GzipDecompressingEntity(httpEntity);
                        } else if ("deflate".equalsIgnoreCase(httpEntity
                                .getContentEncoding().getValue())) {
                            httpEntity = new DeflateDecompressingEntity(
                                    httpEntity);
                        }
                    }
                    result = enCodetoString(httpEntity, encode);// 取出应答字符串
                    // System.out.println(result);
                    if (resultTest(result)) {
                        return true;
                    } else if (result.contains("function JumpSelf")
                            && result.contains("WebShieldSessionVerify")) {
                        int indexs = result.indexOf("&WebShieldSessionVerify");
                        int indexe = result.indexOf("\";}</script>");
                        String verify = result.substring(indexs, indexe);
                        urlString = urlString + verify;
                        if (urlString.contains("tabid=26")
                                && !urlString.contains("landchina")) {
                            urlString = "http://www.landchina.com" + urlString;
                            result = getByHttpClient(urlString, encode,
                                    httpPostClient);
                            if (resultTest(result)) {
                                System.out.println(ip + "公司代理成功抓取" + url);
                                return true;
                            }
                            newProxy = false;
                        } else if (urlString.contains("tabid=26")
                                && urlString.contains("landchina")) {
                            result = getByHttpClient(urlString, encode,
                                    httpPostClient);
                            if (resultTest(result)) {
                                System.out.println(ip + "公司代理成功抓取" + url);
                                return true;
                            }
                            newProxy = false;
                        }
                        newProxy = false;
                    } else if (result.contains("function JumpSelf")
                            && !result.contains("WebShieldSessionVerify")) {
                        urlString = url;
                        newProxy = false;
                    }
                } else if (httpResponse.getStatusLine().getStatusCode() == 302) {
                    System.out.println("重定向了");
                    Header header = httpResponse.getFirstHeader("location");
                    if (header != null) {
                        urlString = header.getValue();
                        System.out.println(urlString);
                        if (urlString.contains("tabid=26")
                                && !urlString.contains("landchina")) {
                            urlString = "http://www.landchina.com" + urlString;
                            result = getByHttpClient(urlString, encode,
                                    httpPostClient);
                            if (resultTest(result)) {
                                System.out.println(ip + "公司代理成功抓取" + url);
                                return true;
                            }
                            newProxy = false;
                        } else if (urlString.contains("tabid=26")
                                && urlString.contains("landchina")) {
                            result = getByHttpClient(urlString, encode,
                                    httpPostClient);
                            if (resultTest(result)) {
                                System.out.println(ip + "公司代理成功抓取" + url);
                                return true;
                            }
                            newProxy = false;
                        }
                        newProxy = false;
                    }
                } else {
                    httpRequst.abort();
                }
            } catch (ClientProtocolException e) {
                newProxy = true;
                System.out.println(ip + "代理ip拒绝了");
            } catch (IOException e) {
                oldProxyUsecount++;
                System.out.println(ip + "代理读取超时");
            }
        }
        return false;
    }

    private Boolean resultTest(String result) {
        if (!result.equals("") && !result.equals("100")
                && !result.contains("<title>blank")
                && !result.contains("Error Page Messages")
                && !result.contains("<title>404")
                && !result.contains("您的访问出错了") && !result.contains("302 Found")
                && !result.contains("出错页面") && !result.contains("没有找到这篇文章！")
                && !result.contains("特定于实例的错误") && !result.contains("错误 404")
                && !result.contains("Error report")
                && !result.contains("function JumpSelf")
                && !result.contains("refused") && !result.contains("网站防火墙")
                && !result.contains("无法解析服务器") && !result.contains("STATUS OK")
                && !result.contains("refresh")
                && !result.contains("DownloadError")
                && !result.contains("Not Found")
                && !result.contains("Runtime Error")
                && !result.contains("Service Unavailable")) {
            return true;
        }

        return false;

    }

    public static String enCodetoString(final HttpEntity entity,
                                        final String defaultCharset) throws IOException, ParseException {
        return enCodetoStringDo(entity,
                defaultCharset != null ? Charset.forName(defaultCharset) : null);
    }

    public static String enCodetoStringDo(final HttpEntity entity,
                                          Charset defaultCharset) throws IOException, ParseException {
        if (entity == null) {
            throw new IllegalArgumentException("HTTP entity may not be null");
        }
        InputStream instream = entity.getContent();
        if (instream == null) {
            return null;
        }
        try {
            if (entity.getContentLength() > Integer.MAX_VALUE) {
                throw new IllegalArgumentException(
                        "HTTP entity too large to be buffered in memory");
            }
            int i = (int) entity.getContentLength();
            if (i < 0) {
                i = 4096;
            }
            Charset charset = null;
            try {
                // ContentType contentType = ContentType.get(entity);
                // if (contentType != null) {
                // charset = contentType.getCharset();
                // }
            } catch (final UnsupportedCharsetException ex) {
                throw new UnsupportedEncodingException(ex.getMessage());
            }
            if (charset == null) {
                charset = defaultCharset;
            }
            if (charset == null) {
                charset = HTTP.DEF_CONTENT_CHARSET;
            }
            Reader reader = new InputStreamReader(instream, charset);
            CharArrayBuffer buffer = new CharArrayBuffer(i);
            char[] tmp = new char[1024];
            int l;
            while ((l = reader.read(tmp)) != -1) {
                buffer.append(tmp, 0, l);
            }
            return buffer.toString();
        } finally {
            instream.close();
        }
    }

    /**
     *
     * @Description: TODO
     * @param @param 硬盘名
     * @param @param 文件名
     * @param @param 文件夹名
     * @param @param 保存后缀名
     * @param @param 保存的内容
     * @return void
     * @throws
     * @author joe
     * @date 2015-3-6
     */
    public static void writeToFile(String topName, String fileName,
                                   String tagName, String type, String content) {
        File dirFile = null;
        try {
            dirFile = new File(topName + ":\\" + tagName);
            if (!(dirFile.exists()) && !(dirFile.isDirectory())) {
                boolean creadok = dirFile.mkdirs();
                if (creadok) {
                    System.out.println(" ok:创建文件夹成功！ ");
                } else {
                    System.out.println(" err:创建文件夹失败！ ");
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        String fullPath = dirFile + "/" + fileName + "." + type;
        write(fullPath, content);
    }

    /**
     * 写文件
     *
     * @param path
     * @param content
     */
    public static boolean write(String path, String content) {
        String s = new String();
        String s1 = new String();
        BufferedWriter output = null;
        try {
            File f = new File(path);
            if (f.exists()) {
            } else {
                System.out.println("文件不存在，正在创建...");
                if (f.createNewFile()) {
                    System.out.println("文件创建成功！");
                } else {
                    System.out.println("文件创建失败！");
                }
            }
            BufferedReader input = new BufferedReader(new FileReader(f));
            while ((s = input.readLine()) != null) {
                s1 += s + "\n";
            }
            System.out.println("原文件内容：" + s1);
            input.close();
            s1 += content;
            output = new BufferedWriter(new FileWriter(f));
            output.write(s1);
            output.flush();
            return true;
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        } finally {
            if (output != null) {
                try {
                    output.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    /**
     *
     * @Description: TODO
     * @param @param fileUrl文件链接
     * @param @param topName硬盘名
     * @param @param fileName文件名
     * @param @param tagName文件夹名
     * @param @param type 后缀名
     * @return void
     * @throws
     * @author joe
     * @date 2015-3-6
     */
    public void downLoadFile(String fileUrl, String topName, String fileName,
                             String tagName, String type) {
        // 下载网络文件
        int bytesum = 0;
        int byteread = 0;
        try {
            URL url = new URL(fileUrl);
            URLConnection conn = url.openConnection();
            InputStream inStream = conn.getInputStream();
            File fileD = new File(topName + ":/" + tagName);
            // 如果文件夹不存在则创建
            if (!fileD.exists() && !fileD.isDirectory()) {
                System.out.println("正在新建目录");
                fileD.mkdirs();
                ;
            } else {
                System.out.println("目录存在");
            }
            File file = new File(topName + ":/" + tagName + "/" + fileName
                    + "." + type);
            if (!file.exists()) {
                try {
                    file.createNewFile();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            FileOutputStream fs = new FileOutputStream(topName + ":/" + tagName
                    + "/" + fileName + "." + type);
            byte[] buffer = new byte[1204];
            while ((byteread = inStream.read(buffer)) != -1) {
                bytesum += byteread;
                System.out.println(bytesum);
                fs.write(buffer, 0, byteread);
            }
            System.out.println("downloaded ok");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws ClientProtocolException,
            IOException {
        CrawlMethodManager manager = new CrawlMethodManager();


    }

}